from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from io import StringIO
 
# 打开PDF文件
with open('../test.pdf', 'rb') as file:
    # 创建一个PDFResourceManager对象
    resource_manager = PDFResourceManager()
 
    # 创建一个StringIO对象，用于存储提取的文本内容
    output = StringIO()
 
    # 创建一个TextConverter对象
    converter = TextConverter(resource_manager, output, laparams=LAParams())
 
    # 创建一个PDFPageInterpreter对象
    interpreter = PDFPageInterpreter(resource_manager, converter)
 
    # 逐页解析文档
    for page in PDFPage.get_pages(file):
        interpreter.process_page(page)
 
    # 获取提取的文本内容
    text = output.getvalue()
    print(text)